// Modified from: https://github.com/ClickHouse/ClickHouse/blob/30fcaeb2a3fff1bf894aae9c776bed7fd83f783f/dbms/src/DataTypes/IDataType.h
//
// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <Common/COWPtr.h>
#include <Core/Field.h>

#include <boost/noncopyable.hpp>
#include <memory>


namespace DB
{
class ReadBuffer;
class WriteBuffer;

class IDataType;
struct FormatSettingsJSON;

class IColumn;
using ColumnPtr = COWPtr<IColumn>::Ptr;
using MutableColumnPtr = COWPtr<IColumn>::MutablePtr;

using DataTypePtr = std::shared_ptr<const IDataType>;
using DataTypes = std::vector<DataTypePtr>;


/** Properties of data type.
  * Contains methods for serialization/deserialization.
  * Implementations of this interface represent a data type (example: UInt8)
  *  or parapetric family of data types (example: Array(...)).
  *
  * DataType is totally immutable object. You can always share them.
  */
class IDataType : private boost::noncopyable
{
public:
    /// Compile time flag. If false, then if C++ types are the same, then SQL types are also the same.
    /// Example: DataTypeString is not parametric: thus all instances of DataTypeString are the same SQL type.
    /// Example: DataTypeFixedString is parametric: different instances of DataTypeFixedString may be different SQL types.
    /// Place it in descendants:
    /// static constexpr bool is_parametric = false;

    /// Name of data type (examples: UInt64, Array(String)).
    virtual String getName() const { return getFamilyName(); }

    virtual TypeIndex getTypeId() const = 0;

    /// Name of data type family (example: FixedString, Array).
    virtual const char * getFamilyName() const = 0;

    /** Binary serialization for range of values in column - for writing to disk/network, etc.
      *
      * Some data types are represented in multiple streams while being serialized.
      * Example:
      * - Arrays are represented as stream of all elements and stream of array sizes.
      * - Nullable types are represented as stream of values (with unspecified values in place of NULLs) and stream of NULL flags.
      *
      * Different streams are identified by "path".
      * If the data type require single stream (it's true for most of data types), the stream will have empty path.
      * Otherwise, the path can have components like "array elements", "array sizes", etc.
      *
      * For multidimensional arrays, path can have arbiraty length.
      * As an example, for 2-dimensional arrays of numbers we have at least three streams:
      * - array sizes;                      (sizes of top level arrays)
      * - array elements / array sizes;     (sizes of second level (nested) arrays)
      * - array elements / array elements;  (the most deep elements, placed contiguously)
      *
      * Descendants must override either serializeBinaryBulk, deserializeBinaryBulk methods (for simple cases with single stream)
      *  or serializeBinaryBulkWithMultipleStreams, deserializeBinaryBulkWithMultipleStreams, enumerateStreams methods (for cases with multiple streams).
      *
      * Default implementations of ...WithMultipleStreams methods will call serializeBinaryBulk, deserializeBinaryBulk for single stream.
      */

    struct Substream
    {
        enum Type
        {
            ArrayElements,
            ArraySizes,

            NullableElements,
            NullMap,

            TupleElement,

            StringSizes,
        };
        Type type;

        /// Index of tuple element, starting at 1.
        String tuple_element_name;

        Substream(Type type) // NOLINT(google-explicit-constructor)
            : type(type)
        {}
    };

    using SubstreamPath = std::vector<Substream>;

    using StreamCallback = std::function<void(const SubstreamPath &)>;
    virtual void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const { callback(path); }
    void enumerateStreams(const StreamCallback & callback, SubstreamPath && path) const
    {
        enumerateStreams(callback, path);
    }
    void enumerateStreams(const StreamCallback & callback) const { enumerateStreams(callback, {}); }

    using OutputStreamGetter = std::function<WriteBuffer *(const SubstreamPath &)>;
    using InputStreamGetter = std::function<ReadBuffer *(const SubstreamPath &)>;

    /** 'offset' and 'limit' are used to specify range.
      * limit = 0 - means no limit.
      * offset must be not greater than size of column.
      * offset + limit could be greater than size of column
      *  - in that case, column is serialized till the end.
      * `position_independent_encoding` - provide better performance when it is false, but it requires not to be
      *     deserialized the data into a column with existing data.
      */
    virtual void serializeBinaryBulkWithMultipleStreams(
        const IColumn & column,
        const OutputStreamGetter & getter,
        size_t offset,
        size_t limit,
        bool /*position_independent_encoding*/,
        SubstreamPath & path) const
    {
        if (WriteBuffer * stream = getter(path))
            serializeBinaryBulk(column, *stream, offset, limit);
    }

    void serializeBinaryBulkWithMultipleStreams(
        const IColumn & column,
        const OutputStreamGetter & getter,
        size_t offset,
        size_t limit,
        bool position_independent_encoding,
        SubstreamPath && path) const
    {
        serializeBinaryBulkWithMultipleStreams(column, getter, offset, limit, position_independent_encoding, path);
    }

    /** Read no more than limit values and append them into column.
      * `avg_value_size_hint` - if not zero, may be used to avoid reallocations while reading column of String type.
      * `position_independent_encoding` - provide better performance when it is false, but it requires not to be
      *     deserialized the data into a column with existing data.
      */
    virtual void deserializeBinaryBulkWithMultipleStreams(
        IColumn & column,
        const InputStreamGetter & getter,
        size_t limit,
        double avg_value_size_hint,
        bool /*position_independent_encoding*/,
        SubstreamPath & path) const
    {
        if (ReadBuffer * stream = getter(path))
            deserializeBinaryBulk(column, *stream, limit, avg_value_size_hint);
    }

    void deserializeBinaryBulkWithMultipleStreams(
        IColumn & column,
        const InputStreamGetter & getter,
        size_t limit,
        double avg_value_size_hint,
        bool position_independent_encoding,
        SubstreamPath && path) const
    {
        deserializeBinaryBulkWithMultipleStreams(
            column,
            getter,
            limit,
            avg_value_size_hint,
            position_independent_encoding,
            path);
    }

    /** Override these methods for data types that require just single stream (most of data types).
      */
    virtual void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const;
    virtual void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint)
        const;

    /** Serialization/deserialization of individual values.
      *
      * These are helper methods for implementation of various formats to input/output for user (like TEXT, JSON, etc.).
      * There is no one-to-one correspondence between formats and these methods.
      * For example, TabSeparated and Pretty formats could use same helper method serializeTextEscaped.
      *
      * For complex data types (like arrays) binary serde for individual values may differ from bulk serde.
      * For example, if you serialize single array, it will be represented as its size and elements in single contiguous stream,
      *  but if you bulk serialize column with arrays, then sizes and elements will be written to separate streams.
      */

    /// There is two variants for binary serde. First variant work with Field.
    virtual void serializeBinary(const Field & field, WriteBuffer & ostr) const = 0;
    virtual void deserializeBinary(Field & field, ReadBuffer & istr) const = 0;

    /// Other variants takes a column, to avoid creating temporary Field object.
    /// Column must be non-constant.

    /// Serialize one value of a column at specified row number.
    virtual void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const = 0;
    /// Deserialize one value and insert into a column.
    /// If method will throw an exception, then column will be in same state as before call to method.
    virtual void deserializeBinary(IColumn & column, ReadBuffer & istr) const = 0;

    /** Text serialization with escaping but without quoting.
      */
    virtual void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr) const = 0;

    virtual void deserializeTextEscaped(IColumn & column, ReadBuffer & istr) const = 0;

    /** Text serialization as a literal that may be inserted into a query.
      */
    virtual void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr) const = 0;

    virtual void deserializeTextQuoted(IColumn & column, ReadBuffer & istr) const = 0;

    /** Text serialization for displaying on a terminal or saving into a text file, and the like.
      * Without escaping or quoting.
      */
    virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr) const = 0;

    /** Text serialization intended for using in JSON format.
      * force_quoting_64bit_integers parameter forces to brace UInt64 and Int64 types into quotes.
      */
    virtual void serializeTextJSON(
        const IColumn & column,
        size_t row_num,
        WriteBuffer & ostr,
        const FormatSettingsJSON & settings) const
        = 0;
    virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr) const = 0;

    /** Create empty column for corresponding type.
      */
    virtual MutableColumnPtr createColumn() const = 0;

    /** Create ColumnConst for corresponding type, with specified size and value.
      */
    ColumnPtr createColumnConst(size_t size, const Field & field) const;
    ColumnPtr createColumnConstWithDefaultValue(size_t size) const;

    /** Get default value of data type.
      * It is the "default" default, regardless the fact that a table could contain different user-specified default.
      */
    virtual Field getDefault() const = 0;

    /** Directly insert default value into a column. Default implementation use method IColumn::insertDefault.
      * This should be overriden if data type default value differs from column default value (example: Enum data types).
      */
    virtual void insertDefaultInto(IColumn & column) const;

    /// Checks that two instances belong to the same type
    virtual bool equals(const IDataType & rhs) const = 0;

    virtual ~IDataType() = default;

    /// Various properties on behaviour of data type.

    /** The data type is dependent on parameters and types with different parameters are different.
      * Examples: FixedString(N), Tuple(T1, T2), Nullable(T).
      * Otherwise all instances of the same class are the same types.
      */
    virtual bool isParametric() const = 0;

    /** The data type is dependent on parameters and at least one of them is another type.
      * Examples: Tuple(T1, T2), Nullable(T). But FixedString(N) is not.
      */
    virtual bool haveSubtypes() const = 0;

    /** Can appear in table definition.
      * Counterexamples: Interval, Nothing.
      */
    virtual bool cannotBeStoredInTables() const { return false; }

    /** In text formats that render "pretty" tables,
      *  is it better to align value right in table cell.
      * Examples: numbers, even nullable.
      */
    virtual bool shouldAlignRightInPrettyFormats() const { return false; }

    /** Does formatted value in any text format can contain anything but valid UTF8 sequences.
      * Example: String (because it can contain arbitary bytes).
      * Counterexamples: numbers, Date, DateTime.
      * For Enum, it depends.
      */
    virtual bool textCanContainOnlyValidUTF8() const { return false; }

    /** Is it possible to compare for less/greater, to calculate min/max?
      * Not necessarily totally comparable. For example, floats are comparable despite the fact that NaNs compares to nothing.
      * The same for nullable of comparable types: they are comparable (but not totally-comparable).
      */
    virtual bool isComparable() const { return false; }

    /** Does it make sense to use this type with COLLATE modifier in ORDER BY.
      * Example: String, but not FixedString.
      */
    virtual bool canBeComparedWithCollation() const { return false; }

    /** If the type is totally comparable (Ints, Date, DateTime, not nullable, not floats)
      *  and "simple" enough (not String, FixedString) to be used as version number
      *  (to select rows with maximum version).
      */
    virtual bool canBeUsedAsVersion() const { return false; }

    /** Values of data type can be summed (possibly with overflow, within the same data type).
      * Example: numbers, even nullable. Not Date/DateTime. Not Enum.
      * Enums can be passed to aggregate function 'sum', but the result is Int64, not Enum, so they are not summable.
      */
    virtual bool isSummable() const { return false; }

    /** Can be used in operations like bit and, bit shift, bit not, etc.
      */
    virtual bool canBeUsedInBitOperations() const { return false; }

    /** Can be used in boolean context (WHERE, HAVING).
      * UInt8, maybe nullable.
      */
    virtual bool canBeUsedInBooleanContext() const { return false; }

    /** Integers, floats, not Nullable. Not Enums. Not Date/DateTime.
      */
    virtual bool isNumber() const { return false; }

    /** Integers. Not Nullable. Not Enums. Not Date/DateTime.
      */
    virtual bool isInteger() const { return false; }
    virtual bool isUnsignedInteger() const { return false; }

    /** Floating point values. Not Nullable. Not Enums. Not Date/DateTime.
     */
    virtual bool isFloatingPoint() const { return false; }

    /** Date, DateTime, MyDate, MyDateTime. Not Nullable.
      */
    virtual bool isDateOrDateTime() const { return false; }

    /** MyDate, MyDateTime. Not Nullable.
      */
    virtual bool isMyDateOrMyDateTime() const { return false; }

    /** MyTime. Not Nullable.
     */
    virtual bool isMyTime() const { return false; }

    /** Decimal. Not Nullable.
      */
    virtual bool isDecimal() const { return false; }

    /** Numbers, Enums, Date, DateTime, MyDate, MyDateTime. Not nullable.
      */
    virtual bool isValueRepresentedByNumber() const { return false; }

    /** Integers, Enums, Date, DateTime, MyDate, MyDateTime. Not nullable.
      */
    virtual bool isValueRepresentedByInteger() const { return false; }

    /** Values are unambiguously identified by contents of contiguous memory region,
      *  that can be obtained by IColumn::getDataAt method.
      * Examples: numbers, Date, DateTime, String, FixedString,
      *  and Arrays of numbers, Date, DateTime, FixedString, Enum, but not String.
      *  (because Array(String) values became ambiguous if you concatenate Strings).
      * Counterexamples: Nullable, Tuple.
      */
    virtual bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const { return false; }

    virtual bool isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion() const
    {
        return isValueUnambiguouslyRepresentedInContiguousMemoryRegion()
            && (isValueRepresentedByNumber() || isFixedString());
    }

    virtual bool isString() const { return false; }
    virtual bool isFixedString() const { return false; }
    virtual bool isStringOrFixedString() const { return isString() || isFixedString(); }

    /** Example: numbers, Date, DateTime, FixedString, Enum... Nullable and Tuple of such types.
      * Counterexamples: String, Array.
      * It's Ok to return false for AggregateFunction despite the fact that some of them have fixed size state.
      */
    virtual bool haveMaximumSizeOfValue() const { return false; }

    /** Size in amount of bytes in memory. Throws an exception if not haveMaximumSizeOfValue.
      */
    virtual size_t getMaximumSizeOfValueInMemory() const { return getSizeOfValueInMemory(); }

    /** Throws an exception if value is not of fixed size.
      */
    virtual size_t getSizeOfValueInMemory() const;

    /** Integers (not floats), Enum, String, FixedString.
      */
    virtual bool isCategorial() const { return false; }

    virtual bool isEnum() const { return false; }

    virtual bool isNullable() const { return false; }
    /** Is this type can represent only NULL value? (It also implies isNullable)
      */
    virtual bool onlyNull() const { return false; }

    /** If this data type cannot be wrapped in Nullable data type.
      */
    virtual bool canBeInsideNullable() const { return false; }

    /// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column.
    static void updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint);

    static String getFileNameForStream(const String & column_name, const SubstreamPath & path);

    static bool isNullMap(const SubstreamPath & path);
    static bool isArraySizes(const SubstreamPath & path);
    static bool isStringSizes(const SubstreamPath & path);
};


} // namespace DB
